# -*- coding: utf-8 -*-

import scrapy
import requests
import json
from datetime import datetime
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from ..base_parser import BasePageParser
from ...utils import ContentProcessor, FileUtils
from .common import update_media_urls, extract_current_page_menu
from fire_control_spider.config import DEBUG
categories = ['Home', 'About NFPA', 'Why NFPA Codes and Standards Matter']

class CateListInBuildingAndLifeParser(BasePageParser):
    """NFPA新闻详情页面解析器"""
    
    def can_handle(self, response):
        return response.url == 'https://www.nfpa.org/education-and-research/building-and-life-safety'

    
    def parse(self, response):
        soup = BeautifulSoup(response.text, 'lxml')

        link_list = soup.select('#a11y-Education-and-Research-menu ul li a')
        for link in link_list:
            url = link.get('href')
            if url:
                full_url = urljoin(response.url, url)
                print(full_url)
                yield self.make_request(
                    full_url,
                    use_selenium=True,
                    callback=self.spider.parse,
                    meta={
                        'category_url': response.url,
                        'page_type': 'nfpa_news_details',
                    }
                )
                if DEBUG:
                    break
